import tkinter as tk
import tkinter
from tkinter import StringVar
from tkinter import scrolledtext
from collections import Counter
import math


root= tkinter.Tk()
root.title("刘亚圣")
root.geometry("1000x600")
label=tkinter.Label(root,text="请输入字符串1:",fg="black",font=("宋体", 10),)
label.grid(column=0,row=0)

e=StringVar()
entry1=tkinter.Entry(root,textvariable=e)
entry1.grid(column=1,row=0)

label2=tkinter.Label(root,text="请输入字符串2:",fg="black",font=("宋体", 10),)
label2.grid(column=0,row=1)

b=StringVar()
entry2=tkinter.Entry(root,textvariable=b)
entry2.grid(column=1,row=1)

#字符串1 TF
label=tkinter.Label(root,text="字符串1 TF:",fg="black",font=("宋体", 10),)
label.grid(column=0,row=2)

texta=scrolledtext.ScrolledText(root,width=35,height=15)
texta.place(x=30,y=90)

#字符串1 TF.IDF

label=tkinter.Label(root,text="字符串1 TF.IDF:",fg="black",font=("宋体", 10),)
label.place(x=10,y=300)

texta1=scrolledtext.ScrolledText(root,width=35,height=15)
texta1.place(x=30,y=330)

#IDF
label=tkinter.Label(root,text="IDF:",fg="black",font=("宋体", 10),)
label.place(x=700,y=64)

textidf=scrolledtext.ScrolledText(root,width=35,height=15)
textidf.place(x=700,y=90)


#字符串2 TF
label=tkinter.Label(root,text="字符串2 TF:",fg="black",font=("宋体", 10),)
label.place(x=330,y=64)

textb=scrolledtext.ScrolledText(root,width=35,height=15)
textb.place(x=350,y=90)

#字符串2 TF.IDF

label=tkinter.Label(root,text="字符串2 TF.IDF:",fg="black",font=("宋体", 10),)
label.place(x=330,y=300)

textb1=scrolledtext.ScrolledText(root,width=35,height=15)
textb1.place(x=350,y=330)


def tfidf():
   
    la=list(e.get())
    lb=list(b.get())
    seta=set(la)
    setb=set(lb)
    #print(seta)
    #print(setb)
    '''
    tf
    '''
    #对字符串a进行词频统计
    ca={}
    #print('字符串1：')
    texta.delete(1.0,'end')
    for item in seta:
        ca[item]=la.count(item)/len(la)
        #print(item,'的tf为：',ca[item])
        texta.insert(1.0,item+'的tf为：'+str(ca[item])+'\n\n')
        
    #对字符串b进行词频统计
    cb={}
    textb.delete(1.0,'end')
    #print('字符串2：')
    for item in setb:
    
        cb[item]=lb.count(item)/len(lb)
        #print(item,'的tf为：',cb[item])
        textb.insert(1.0,item+'的tf为：'+str(cb[item])+'\n\n')

    '''
    idf
    '''

    c=list(seta)+list(setb)

    cc=set(c)
    #print(cc)
    cf={}
    idf={}
    for item in cc:
        cf[item]= c.count(item)
    #print(cf)
    textidf.delete(1.0,'end')
    for item in cf:
        #print(item,' 的idf为：',math.log(2/cf[item],10))
        idf[item]=math.log(2/cf[item],10)
        textidf.insert(1.0,item+'的idff为：'+str(idf[item])+'\n\n')
    '''
    tfidf
    '''
    texta1.delete(1.0,'end')
    tfidfa={}
    for item in seta:
        tfidfa[item]=ca[item]*idf[item]
        #print(item,'在字符串1中的tfidf为:',tfidfa[item])
        texta1.insert(1.0,item+'的tfidf为：'+str(tfidfa[item])+'\n\n')

    tfidfb={}
    textb1.delete(1.0,'end')
    for item in setb:
        tfidfb[item]=cb[item]*idf[item]
        #print(item,'在字符串2中的tfidf为:',tfidfb[item])
        textb1.insert(1.0,item+'的tfidf为：'+str(tfidfb[item])+'\n\n')

q=tkinter.Button(text="计算TFIDF",command= tfidf)
q.grid(column=2,row=1)

